pacman::p_load(tidyverse, ggalluvial, ggplot2, forcats, hhi, xtable, gridExtra,ggpubr,grid,ggrepel,questionr,texreg)
#setwd("")#set working directory 

dta22<-read_rds("iscap.Rdata")

#figure 1 ----- 

#general election vote
dta22$votechoice.20 <- dta22$VP5_A_1_15
dta22$votechoice.16 <- dta22$V5_A_16_10
dta22$votechoice.08 <- dta22$VP5_3

dta22$votechoice.08 <- as.character(dta22$votechoice.08) %>%
  dplyr::recode(
    "Barack Obama, the Democrat" = "Obama", 
    "Other (specify)" = "Other/DK/\nWouldn't Vote", 
    "John McCain, the Republican" = "McCain", 
    "Ralph Nader" = "Other/DK/\nWouldn't Vote", 
    "Don't know" = "Other/DK/\nWouldn't Vote",
    .default = NA_character_
  ) 

dta22$votechoice.16 <- as.character(dta22$votechoice.16) %>%
  dplyr::recode(
    "Jill Stein and Ajamu Baraka, the Green Party candidates" = "Other/DK/\nWouldn't Vote", 
    "Gary Johnson and Bill Weld, the Libertarians" = "Other/DK/\nWouldn't Vote", 
    "Hillary Clinton, the Democrat" = "Clinton", 
    "Donald Trump, the Republican" = "Trump", 
    "Other (SPECIFY)" = "Other/DK/\nWouldn't Vote", 
    "Would not vote for President" = "Other/DK/\nWouldn't Vote", 
    .default = NA_character_
  )

dta22$votechoice.20 <- as.character(dta22$votechoice.20) %>%
  dplyr::recode(
    "Joe Biden, the Democrat" = "Biden", 
    "Would not vote for President" = "Other/DK/\nWouldn't Vote", 
    "Donald Trump, the Republican" = "Trump", 
    "Other (SPECIFY)" = "Other/DK/\nWouldn't Vote",
    .default = NA_character_
  )

#primary vote choice: republican 
dta22$votechoice.rep.16 <- dta22$VP1A_REP_16_10
dta22$votechoice.rep.08 <- dta22$VP1A_REP_1
dta22$votechoice.rep.16 <- as.character(dta22$votechoice.rep.16) %>%
  dplyr::recode(
    "Refused" = NA_character_, 
    "I wouldn't vote in the Republican primary" = NA_character_, 
  )
#primary vote choice: democrat
dta22$votechoice.dem.20 <- dta22$VP1A_DEM_14
dta22$votechoice.dem.16 <- dta22$VP1A_DEM_16_10
dta22$votechoice.dem.08 <- dta22$VP1A_DEM_2

#collapse minor candidates for plot
dta22$votechoice.dem.08 <- as.character(dta22$votechoice.dem.08) %>%
  dplyr::recode(
    "Chris Dodd" = "Someone else", 
    "Mike Gravel" = "Someone else", 
    "Bill Richardson" = "Someone else", 
    "Joe Biden" = "Someone else", 
  )

dta22$votechoice.dem.16 <- as.character(dta22$votechoice.dem.16) %>%
  dplyr::recode(
    "I wouldn't vote in the Democratic primary" = NA_character_, 
    "Refused" = NA_character_, 
  )

dta22$votechoice.dem.20 <- as.character(dta22$votechoice.dem.20) %>%
  dplyr::recode(
    "Refused" = NA_character_, 
  )

### general election ----- 
sub.non.na <- dta22 %>%
  group_by(votechoice.08, votechoice.16, votechoice.20, ideology_4) %>%
  dplyr::summarize(count = n()) %>%
  mutate(
    votechoice.08 = fct_relevel(votechoice.08, "Obama", "McCain", "Other/DK/ Wouldn't Vote"),
    votechoice.16 = fct_relevel(votechoice.16, "Clinton", "Trump", "Other/DK/ Wouldn't Vote"),
    votechoice.20 = fct_relevel(votechoice.20, "Biden", "Trump", "Other/DK/ Wouldn't Vote")
  )

sub.non.na$ideology_group <- factor(
  ifelse(sub.non.na$ideology_4 %in% c("extreme lib", "lib"), "Liberal",
         ifelse(sub.non.na$ideology_4 %in% c("slight lib", "moderate", "slight con"), "Moderate",
                ifelse(sub.non.na$ideology_4 %in% c("extreme con", "con"), "Conservative", NA))),
  levels = c("Liberal", "Moderate", "Conservative") 
)

sub.w.na <- sub.non.na #retain NAs in a separate dataframe
sub.non.na <- sub.non.na %>%
  na.omit(.)

sum(sub.non.na$count) #937 non-NA obs incl. ideo

### republican primaries ----- 
sub.rep.non.na <- dta22 %>%
  group_by(votechoice.rep.08, votechoice.rep.16, ideology_4) %>%
  dplyr::summarize(count = n())

sub.rep.non.na$ideology_group <- factor(
  ifelse(sub.rep.non.na$ideology_4 %in% c("slight lib", "lib"), "Slightly Liberal/\nLiberal",
         ifelse(sub.rep.non.na$ideology_4 %in% c("moderate", "slight con"), "Moderate/\nSlightly Convservative",
                ifelse(sub.rep.non.na$ideology_4 %in% c("extreme con", "con"), "Conservative/\nExtremely Conservative", NA))),
  levels = c("Conservative/\nExtremely Conservative","Moderate/\nSlightly Convservative", "Slightly Liberal/\nLiberal") 
)

sub.rep.w.na <- sub.rep.non.na #retain NAs in a separate dataframe; needed for Figure 2
sub.rep.non.na <- sub.rep.non.na %>%
  na.omit(.)
sum(sub.rep.non.na$count) #443 non-NA obs incl. ideo

### democratic primaries -----
sub.dem.non.na <- dta22 %>% 
  group_by(votechoice.dem.08, votechoice.dem.16, votechoice.dem.20, ideology_4) %>%
  dplyr::summarize(count = n())

sub.dem.non.na$ideology_group <- factor(
  ifelse(sub.dem.non.na$ideology_4 %in% c("slight con", "con"), "Slightly Conservative/\nConservative",
         ifelse(sub.dem.non.na$ideology_4 %in% c("moderate", "slight lib"), "Moderate/\nSlightly Liberal",
                ifelse(sub.dem.non.na$ideology_4 %in% c("extreme lib", "lib"), "Liberal/\nExtremely Liberal", NA))),
  levels = c("Liberal/\nExtremely Liberal","Moderate/\nSlightly Liberal", "Slightly Conservative/\nConservative") 
)

sub.dem.w.na <- sub.dem.non.na #retain NAs in a separate dataframe; needed for Figure 2
sub.dem.non.na <- sub.dem.non.na %>%
  na.omit(.)
sum(sub.dem.non.na$count) #331 total non-NA obs incl. ideo

### plot ----- 
p1 <- ggplot(as.data.frame(sub.dem.non.na),
             aes(y = count, axis1 = votechoice.dem.08, axis2 = votechoice.dem.16, axis3 = votechoice.dem.20)) +
  geom_alluvium(aes(fill = ideology_group), width = 1/12) +
  scale_fill_discrete(name = "", labels = c("Slightly Conservative/\nConservative", "Moderate/\nSlightly Liberal", "Liberal/\nExtremely Liberal")) +
  geom_stratum(width = 0.05, fill = "black", color = "grey") +
  geom_label_repel(stat = "stratum", aes(label = after_stat(stratum)), 
                   box.padding = 0.2, 
                   point.padding = 0.2, 
                   size = 3, 
                   max.overlaps = Inf) +
  ggtitle(paste0("Democratic Primary (N=",sum(sub.dem.non.na$count),")")) +
  scale_x_discrete(limits = c("2008", "2016", "2020"), expand = c(0.15, 0.15)) +
  theme_minimal() +  
  theme(plot.title = element_text(hjust = 0.5, size = 20, face = "italic"), 
        legend.position = "top",
        plot.margin = margin(10, 10, 10, 10),
        legend.text = element_text(size = 14),
        legend.title = element_text(size = 16),
        axis.title.y = element_text(size = 16),
        axis.text.y = element_text(size = 14),
        axis.text.x = element_text(size = 16)) + 
  ylab("Frequency")

p1.5 <- ggplot(as.data.frame(sub.rep.non.na),
               aes(y = count, axis1 = votechoice.rep.08, axis2 = votechoice.rep.16)) +
  geom_alluvium(aes(fill = ideology_group), width = 1/12) +
  scale_fill_discrete(name = "", labels = c("Conservative/\nExtremely Conservative","Moderate/\nSlightly Conservative", "Slightly Liberal/\nLiberal")) +
  geom_stratum(width = 0.05, fill = "black", color = "grey") +
  geom_label_repel(stat = "stratum", aes(label = after_stat(stratum)), 
                   box.padding = 0.2, 
                   point.padding = 0.2, 
                   size = 3, 
                   max.overlaps = Inf) +
  ggtitle(paste0("Republican Primary (N=",sum(sub.rep.non.na$count),")")) +
  scale_x_discrete(limits = c("2008", "2016"), expand = c(0.15, 0.15)) +
  theme_minimal() +  
  theme(plot.title = element_text(hjust = 0.5, size = 20, face = "italic"), 
        legend.position = "top",
        plot.margin = margin(10, 10, 10, 10),
        legend.text = element_text(size = 14),
        legend.title = element_text(size = 16),
        axis.title.y = element_text(size = 16),
        axis.text.y = element_text(size = 14),
        axis.text.x = element_text(size = 16)) + 
  ylab("Frequency")

p2 <- ggplot(as.data.frame(sub.non.na),
             aes(y = count, axis1 = votechoice.08, axis2 = votechoice.16, axis3=votechoice.20)) +
  geom_alluvium(aes(fill = ideology_group), width = 1/12) +
  scale_fill_discrete(name = "", labels = c("Conservative/\nExtremely Conservative","Slightly Liberal/\nModerate/\nSlightly Conservative","Extremely Liberal/\nLiberal")) +
  geom_stratum(width = 0.05, fill = "black", color = "grey") +
  geom_label(stat = "stratum", aes(label = after_stat(stratum))) +
  scale_x_discrete(limits = c("2008", "2016", "2020"), expand = c(0.15, 0.15)) +
  ggtitle(paste0("General (N=",sum(sub.non.na$count),")")) +
  theme_minimal() +  
  theme(plot.title = element_text(hjust = 0.5, size = 20, face = "italic"), 
        legend.position = "top",
        plot.margin = margin(10, 10, 10, 10),
        legend.text = element_text(size = 14),
        legend.title = element_text(size = 16),
        axis.title.y = element_text(size = 16),
        axis.text.y = element_text(size = 14),
        axis.text.x = element_text(size = 16)) +
  ylab("Frequency")

pdf("figure1.pdf", width = 8, height = 10)
grid.arrange(
  p1, p1.5, p2,
  ncol = 1, 
  heights = c(3, 3, 3), 
  top = textGrob("Democratic Primary vs. General Election", 
                 gp = gpar(fontsize = 30, fontface = "bold"))
)
dev.off()

# figure 2 ------ 

#democratic primary transitions 2008-2016
subh.dem.exp.08.16 <- sub.dem.w.na %>%
  filter(!is.na(votechoice.dem.08)) %>% 
  filter(!is.na(votechoice.dem.16)) %>% 
  dplyr::select(votechoice.dem.08, votechoice.dem.16, count) %>%#collapsing over ideology
  group_by(votechoice.dem.08, votechoice.dem.16) %>%
  dplyr::summarize(count = sum(count, na.rm = TRUE)) 
subh.dem.exp.08.16
sum(subh.dem.exp.08.16$count)#547 obs w/ ideo NAs

subh.dem.exp.08.16<-subh.dem.exp.08.16%>%
  dplyr::mutate(count = round((count / sum(.$count)) * 100,2)) 

subh.dem.exp.16.20 <- sub.dem.w.na %>%
  filter(!is.na(votechoice.dem.16)) %>% 
  filter(!is.na(votechoice.dem.20)) %>% 
  dplyr::select(votechoice.dem.16, votechoice.dem.20, count) %>%#collapsing over ideology
  group_by(votechoice.dem.16, votechoice.dem.20) %>%
  dplyr::summarize(count = sum(count, na.rm = TRUE))
sum(subh.dem.exp.16.20$count)#406 obs w/ ideo NAs

subh.dem.exp.16.20<-subh.dem.exp.16.20%>%
  dplyr::mutate(count = round((count / sum(.$count)) * 100,2)) 

subh.rep.exp.08.16 <- sub.rep.w.na %>%
  filter(!is.na(votechoice.rep.08)) %>% 
  filter(!is.na(votechoice.rep.16)) %>% 
  dplyr::select(votechoice.rep.08, votechoice.rep.16, count) %>%
  group_by(votechoice.rep.08, votechoice.rep.16) %>%
  dplyr::summarize(count = sum(count, na.rm = TRUE))
sum(subh.rep.exp.08.16$count)#445 obs w/ ideo NAs

subh.rep.exp.08.16<-subh.rep.exp.08.16%>%
  dplyr::mutate(count = round((count / sum(.$count)) * 100,2)) 

subh.dem.exp.08.16 #figure 2 top panel
subh.dem.exp.16.20 #figure 2 middle panel
subh.rep.exp.08.16 #figure 2 bottom panel
subh.dem.exp.08.16
subh.dem.exp.08.16.p <-ggplot(subh.dem.exp.08.16, aes(x = votechoice.dem.16, y = votechoice.dem.08, fill = count)) +
  geom_tile() +
  geom_text(aes(label = count), color = "white", size = 5) +  
  scale_fill_gradient(low = "lightblue", high = "darkblue") +  
  labs(title = "Primary Preference Transitions, Democratic Primary\n2008-2016")+
  theme_minimal() + 
  theme(
    plot.title = element_text(size = 20, face = "bold", hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1),
    axis.text.y = element_text(angle = 45, hjust = 1),
    axis.title = element_blank(), 
    axis.text = element_blank(),  
    axis.ticks = element_blank(), 
    panel.grid = element_blank(),
    legend.position = "none" 
  )

subh.dem.exp.16.20.p<-ggplot(subh.dem.exp.16.20, aes(x = votechoice.dem.20, y = votechoice.dem.16, fill = count)) +
  geom_tile() +
  geom_text(aes(label = count), color = "white", size = 5) +  
  scale_fill_gradient(low = "lightblue", high = "darkblue") +  
  labs(title = "Primary Preference Transitions, Democratic Primary\n2016-2020")+
  theme_minimal() + 
  theme(
    plot.title = element_text(size = 20, face = "bold", hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1),
    axis.text.y = element_text(angle = 45, hjust = 1),
    axis.title = element_blank(), 
    axis.text = element_blank(),  
    axis.ticks = element_blank(), 
    panel.grid = element_blank(),
    legend.position = "none" 
  )

subh.rep.exp.08.16.p<-ggplot(subh.rep.exp.08.16, aes(x = votechoice.rep.16, y = votechoice.rep.08, fill = count)) +
  geom_tile() +
  geom_text(aes(label = count), color = "white", size = 5) +  
  scale_fill_gradient(low = "#FF6666", high = "darkred") +  
  labs(title = "Primary Preference Transitions, Republican Primary\n2008-2016")+
  theme_minimal() + 
  theme(
    plot.title = element_text(size = 20, face = "bold", hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1),
    axis.text.y = element_text(angle = 45, hjust = 1),
    axis.title = element_blank(), 
    axis.text = element_blank(),  
    axis.ticks = element_blank(), 
    panel.grid = element_blank(),
    legend.position = "none" 
  )

pdf("figure2.pdf", width = 8, height = 12)
grid.arrange(subh.dem.exp.08.16.p, subh.dem.exp.16.20.p, subh.rep.exp.08.16.p, ncol = 1)
dev.off()

# figure A1-----
subh.gop.exp.08.16.within <- sub.rep.w.na %>%
  filter(!is.na(votechoice.rep.08)) %>% 
  filter(!is.na(votechoice.rep.16)) %>% 
  dplyr::select(votechoice.rep.08, votechoice.rep.16, count) %>%
  group_by(votechoice.rep.08, votechoice.rep.16) %>%
  dplyr::summarize(count = sum(count, na.rm = TRUE))

subh.dem.exp.08.16.within <- sub.dem.w.na %>%
  filter(!is.na(votechoice.dem.08)) %>% 
  filter(!is.na(votechoice.dem.16)) %>% 
  dplyr::select(votechoice.dem.08, votechoice.dem.16, count) %>%
  group_by(votechoice.dem.08, votechoice.dem.16) %>%
  dplyr::summarize(count = sum(count, na.rm = TRUE))

subh.dem.exp.16.20.within <- sub.dem.w.na %>%
  filter(!is.na(votechoice.dem.16)) %>% 
  filter(!is.na(votechoice.dem.20)) %>% 
  dplyr::select(votechoice.dem.16, votechoice.dem.20, count) %>%
  group_by(votechoice.dem.16, votechoice.dem.20) %>%
  dplyr::summarize(count = sum(count, na.rm = TRUE))

# collapse small cand.
subh.dem.exp.08.16.within$votechoice.dem.08[subh.dem.exp.08.16.within$votechoice.dem.08=="Dennis Kucinich"] <- "Other"
subh.dem.exp.08.16.within$votechoice.dem.08[subh.dem.exp.08.16.within$votechoice.dem.08=="Someone else"] <- "Other"
subh.dem.exp.08.16.within <- subh.dem.exp.08.16.within%>%
  group_by(votechoice.dem.08, votechoice.dem.16) %>%
  dplyr::summarize(count = sum(count, na.rm = TRUE))

# collapse small cand.
subh.dem.exp.16.20.within$votechoice.dem.20[subh.dem.exp.16.20.within$votechoice.dem.20=="Deval Patrick"] <- "Other"
subh.dem.exp.16.20.within$votechoice.dem.20[subh.dem.exp.16.20.within$votechoice.dem.20=="Someone else"] <- "Other"
subh.dem.exp.16.20.within$votechoice.dem.20[subh.dem.exp.16.20.within$votechoice.dem.20=="Andrew Yang"] <- "Other"
subh.dem.exp.16.20.within$votechoice.dem.20[subh.dem.exp.16.20.within$votechoice.dem.20=="Tom Steyer"] <- "Other"

subh.dem.exp.16.20.within <- subh.dem.exp.16.20.within%>%
  group_by(votechoice.dem.16, votechoice.dem.20) %>%
  dplyr::summarize(count = sum(count, na.rm = TRUE))

sum(subh.dem.exp.08.16.within$count)#547 obs
sum(subh.dem.exp.16.20.within$count)#406 obs

subh.dem.exp.08.16.within%>%
  filter(votechoice.dem.08=="Hillary Clinton")%>%
  filter(votechoice.dem.16=="Hillary Clinton")%>%
  ungroup()%>%
  dplyr::select(count)%>%
  sum(.)/sum(subh.dem.exp.08.16.within$count)#31% with Clinton in both cases

subh.dem.exp.08.16.within%>%
  filter(votechoice.dem.08=="Hillary Clinton")%>%
  filter(votechoice.dem.16=="Bernie Sanders")%>%
  ungroup()%>%
  dplyr::select(count)%>%
  sum(.)/sum(subh.dem.exp.08.16.within$count)#11% with Clinton then Sanders

subh.dem.exp.08.16.within%>%
  filter(votechoice.dem.08=="Barack Obama")%>%
  filter(votechoice.dem.16=="Hillary Clinton")%>%
  ungroup()%>%
  dplyr::select(count)%>%
  sum(.)/sum(subh.dem.exp.08.16.within$count)#26% with Obama then Clinton

subh.dem.exp.08.16.within%>%
  filter(votechoice.dem.08=="Barack Obama")%>%
  filter(votechoice.dem.16=="Bernie Sanders")%>%
  ungroup()%>%
  dplyr::select(count)%>%
  sum(.)/sum(subh.dem.exp.08.16.within$count)#23% with Obama then Sanders

subh.dem.exp.08.16.within%>%
  filter(votechoice.dem.08=="Hillary Clinton" | votechoice.dem.16=="Hillary Clinton")%>%
  filter()%>%
  ungroup()%>%
  dplyr::select(count)%>%
  sum(.)/sum(subh.dem.exp.08.16.within$count)#72% with Clinton once

sub.dem.w.na %>%
  filter(!is.na(votechoice.dem.08)) %>% 
  filter(!is.na(votechoice.dem.16)) %>% 
  dplyr::select(votechoice.dem.16, votechoice.dem.08, count) %>%
  group_by(votechoice.dem.08, votechoice.dem.16) %>%
  dplyr::summarize(count = sum(count, na.rm = TRUE))%>%
  filter(votechoice.dem.08%in%c("Hillary Clinton", "Someone else", "John Edwards"))%>%#someone else includes Biden, Dodd, Gravel, and Richardson
  filter(votechoice.dem.16%in%c("Martin O'Malley", "Hillary Clinton"))%>%
  ungroup()%>%
  dplyr::select(count)%>%
  sum(.)/sum(subh.dem.exp.08.16.within$count)

matrix.dems.08.16<-sub.dem.w.na %>%
  filter(!is.na(votechoice.dem.08)) %>% 
  filter(!is.na(votechoice.dem.16)) %>% 
  dplyr::select(votechoice.dem.16, votechoice.dem.08, count)%>%
  mutate(votechoice.dem.08.left=NA)%>%
  mutate(votechoice.dem.16.left=NA)%>%
  mutate(votechoice.dem.08.left=ifelse(votechoice.dem.08%in%c("Barack Obama", "Dennis Kucinich"),1,votechoice.dem.08.left))%>%
  mutate(votechoice.dem.16.left=ifelse(votechoice.dem.16%in%c("Bernie Sanders"),1,votechoice.dem.16.left))%>%
  mutate(votechoice.dem.08.left=ifelse(votechoice.dem.08%in%c("Hillary Clinton", "Someone else", "John Edwards"),0,votechoice.dem.08.left))%>%#we someone else and it includes the moderate candidates
  mutate(votechoice.dem.16.left=ifelse(votechoice.dem.16%in%c("Martin O'Malley", "Hillary Clinton"),0,votechoice.dem.16.left))%>%
  group_by(votechoice.dem.08.left, votechoice.dem.16.left)%>%
  dplyr::summarize(count = sum(count, na.rm = TRUE))%>%
  ungroup()%>%
  mutate(fraction=count/sum(count))%>%
  na.omit(.)#remove 2008 NAs

matrix.dems.08.16$fraction[1]#34% with moderate wing in both primaries
matrix.dems.08.16$fraction[4]#24% with left wing in both primaries
sum(matrix.dems.08.16$fraction[2:3])#42 switch
chisq.test(matrix(matrix.dems.08.16$count, ncol = 2))#16.37 X^2 

subh.dem.exp.16.20.within%>%
  filter(votechoice.dem.20=="Bernie Sanders")%>%
  filter(votechoice.dem.16=="Hillary Clinton")%>%
  ungroup()%>%
  dplyr::select(count)%>%
  sum(.)/sum(subh.dem.exp.16.20.within$count)#4% with Clinton then Sanders

subh.dem.exp.16.20.within%>%
  filter(votechoice.dem.16=="Bernie Sanders")%>%
  filter(!(votechoice.dem.20%in%c("Bernie Sanders", "Elizabeth Warren")))%>%
  ungroup()%>%
  dplyr::select(count)%>%
  sum(.)/sum(subh.dem.exp.16.20.within$count)#20% with Sanders then non-left

matrix.dems.16.20<-sub.dem.w.na%>%
  filter(!is.na(votechoice.dem.16))%>% 
  filter(!is.na(votechoice.dem.20))%>%
  dplyr::select(votechoice.dem.20, votechoice.dem.16,count)%>%
  mutate(votechoice.dem.16.left=NA)%>%
  mutate(votechoice.dem.20.left=NA)%>%
  mutate(votechoice.dem.16.left=ifelse(votechoice.dem.16%in%c("Bernie Sanders"),1,votechoice.dem.16.left))%>%
  mutate(votechoice.dem.20.left=ifelse(votechoice.dem.20%in%c("Elizabeth Warren", "Bernie Sanders"),1,votechoice.dem.20.left))%>%
  mutate(votechoice.dem.16.left=ifelse(votechoice.dem.16%in%c("Hillary Clinton", "Martin O'Malley"),0,votechoice.dem.16.left))%>%
  mutate(votechoice.dem.20.left=ifelse(votechoice.dem.20%in%c("Amy Klobuchar","Deval Patrick", "Andrew Yang", "Joe Biden", "Michael Bloomberg", "Tom Steyer","Pete Buttigieg"),0,votechoice.dem.20.left))%>%#we avoid assuming that "someone else" is in either faction
  group_by(votechoice.dem.16.left, votechoice.dem.20.left)%>%
  dplyr::summarize(count = sum(count, na.rm = TRUE))%>%
  ungroup()%>%
  mutate(fraction=count/sum(count))%>%
  na.omit(.)
  
matrix.dems.16.20$fraction[1]#45% moderate
matrix.dems.16.20$fraction[4]#16% left
sum(matrix.dems.16.20$fraction[2:3])#34% inconsistent

matrix.gop.08.16<-subh.gop.exp.08.16.within%>%
  mutate(votechoice.gop.08.right=NA)%>%
  mutate(votechoice.gop.16.right=NA)%>%
  mutate(votechoice.gop.08.right=ifelse(votechoice.rep.08%in%c("Sam Brownback","Mitt Romney", "Mike Huckabee", "Fred Thompson","Ron Paul"),1,votechoice.gop.08.right))%>%
  mutate(votechoice.gop.16.right=ifelse(votechoice.rep.16%in%c("Ben Carson", "Ted Cruz", "Mike Huckabee"),1,votechoice.gop.16.right))%>%
  mutate(votechoice.gop.08.right=ifelse(votechoice.rep.08%in%c("John McCain", "Tom Tancredo", "Duncan Hunter", "Rudy Giuliani"),0,votechoice.gop.08.right))%>%
  mutate(votechoice.gop.16.right=ifelse(votechoice.rep.16%in%c("Donald Trump", "John Kasich","Carly Fiorina","Chris Christie","Jeb Bush","Rand Paul","Marco Rubio"),0,votechoice.gop.16.right))%>%
  group_by(votechoice.gop.08.right, votechoice.gop.16.right)%>%
  dplyr::summarize(count = sum(count, na.rm = TRUE))%>%
  ungroup()%>%
  mutate(fraction=count/sum(count))%>%
  na.omit(.)
matrix.gop.08.16$fraction[1]#41% stayed less cons
matrix.gop.08.16$fraction[4]#19% stayed cons
sum(matrix.gop.08.16$fraction[2:3])#40% switched

chisq.test(matrix(matrix.gop.08.16$count, ncol = 2))#13.96 X^2 

pdf("figureA1.pdf")
ggplot(subh.dem.exp.08.16.within, aes(axis1 = votechoice.dem.08, axis2 = votechoice.dem.16, y = count)) +
  geom_alluvium(aes(fill = votechoice.dem.08)) +
  geom_stratum(fill = "white") + 
  geom_label(stat = "stratum", aes(label = after_stat(stratum)), fill = "white", color = "black")+
  labs(y="",x="")+
  theme_minimal() +
  scale_x_discrete(limits = c("2008", "2016")) +
  scale_fill_manual(values = c("Barack Obama" = "lightblue", 
                               "John Edwards" = "lightblue", 
                               "Hillary Clinton" = "darkblue")) +
  theme(legend.position = "none")#figure A1
dev.off()

#figure A2----
pdf("figureA2.pdf")
ggplot(subh.dem.exp.16.20.within, aes(axis1 = votechoice.dem.16, axis2 = votechoice.dem.20, y = count)) +
  geom_alluvium(aes(fill = votechoice.dem.16)) +
  geom_stratum(fill = "white") + 
  geom_label(stat = "stratum", aes(label = after_stat(stratum)), fill = "white", color = "black")+
  labs(y="",x="")+
  theme_minimal() +
  scale_x_discrete(limits = c("2008", "2016")) +
  scale_fill_manual(values = c("Martin O'Malley" = "darkblue", 
                               "Hillary Clinton" = "darkblue", 
                               "Bernie Sanders" = "lightblue")) +
  theme(legend.position = "none")#figure A2
dev.off()

# table 1 ------ 
#collapse minor candidates
dta22$votechoice.dem.20.colap <- as.character(dta22$votechoice.dem.20)
dta22$votechoice.dem.20.colap[dta22$votechoice.dem.20 %in% c("Andrew Yang","Deval Patrick","Someone else","Tom Steyer")] <- "Other"

t1.reg <- dta22 %>% dplyr::select(VP1A_DEM_14,IDEO10,BLACK6,AGE6,SANDERS10P,FEMALE6,EDYEARS6,PID6,HISP6)%>%
  na.omit(.) %>%
  mutate(PID6_RES=PID6/100)#rescale party ID

lout1 <- glm(I(VP1A_DEM_14 %in% c("Bernie Sanders","Elizabeth Warren")) ~  IDEO10,data=t1.reg,
             family=binomial(link=logit))
lout2 <- glm(I(VP1A_DEM_14 %in% c("Bernie Sanders","Elizabeth Warren")) ~  IDEO10+SANDERS10P,data=t1.reg,
             family=binomial(link=logit))
lout3 <- glm(I(VP1A_DEM_14 %in% c("Bernie Sanders","Elizabeth Warren")) ~  IDEO10+SANDERS10P+FEMALE6+BLACK6+HISP6+EDYEARS6+PID6_RES+AGE6,data=t1.reg,
             family=binomial(link=logit))
texreg(list(lout1,lout2,lout3),
       digits=3,
       stars=0.05, 
       caption = "Logistic regression models of 2020 Sanders or Warren support.", 
       label="t:sanderswarren20", 
       custom.coef.names = c("(Intercept)", "Conservative Ideology '16", "Sanders '16", "Female '12", "Black '12", "Hispanic '12", "Education '12", "Party ID '12 (Rescaled)", "Age '12"),
       file="table1.tex")

table1 <- readLines("table1.tex")
table1<-table1[-c(27:30)]
table1 <- gsub("Num\\. obs\\.", "N", table1)
writeLines(table1, "table1.tex")#table 1

#w/o other indenpdent variables
lib<-predict(lout1,data.frame(list("IDEO10"=2)),type="response")
mod<-predict(lout1,data.frame(list("IDEO10"=4)),type="response")
round(lib-mod,2)#14 percentage point decrease

#backing sanders 
sanders.16<-predict(lout2,data.frame(list("IDEO10"=4,"VP1A_DEM_14_B"="Other","SANDERS10P"=1)),type="response")
not.sanders.16<-predict(lout2,data.frame(list("IDEO10"=4,"VP1A_DEM_14_B"="Other","SANDERS10P"=0)),type="response")
round(sanders.16-not.sanders.16,2)#18 percentage point jump

#conditioning on education,gender,race,partisanship
lib<-predict(lout3,data.frame(list("IDEO10"=2,"VP1A_DEM_14_B"="Other","SANDERS10P"=mean(t1.reg$SANDERS10P,na.rm=T),"FEMALE6"=mean(t1.reg$FEMALE6,na.rm=T),
                              "BLACK6"=mean(t1.reg$BLACK6,na.rm=T),"HISP6"=mean(t1.reg$HISP6),"EDYEARS6"=mean(t1.reg$EDYEARS6),
                              "PID6_RES"=mean(t1.reg$PID6_RES,na.rm=T),"AGE6"=mean(t1.reg$AGE6))),type="response")
mod<-predict(lout3,data.frame(list("IDEO10"=4,"VP1A_DEM_14_B"="Other","SANDERS10P"=mean(t1.reg$SANDERS10P,na.rm=T),"FEMALE6"=mean(t1.reg$FEMALE6,na.rm=T),
                              "BLACK6"=mean(t1.reg$BLACK6,na.rm=T),"HISP6"=mean(t1.reg$HISP6),"EDYEARS6"=mean(t1.reg$EDYEARS6),
                              "PID6_RES"=mean(t1.reg$PID6_RES,na.rm=T),"AGE6"=mean(t1.reg$AGE6))),type="response")
round(mod-lib,2)#11 percentage point decrease

#NOTE: Changed included NAs in the outcome (e.g., refused) for consistency. 

# table 2 ------ 
t2.reg <- dta22 %>% dplyr::select(VP1A_REP_16_10,IDEO6,BLACK6,AGE6,VP1A_REP_1,FEMALE6,EDYEARS6,PID6,HISP6)%>%
  na.omit(.)
lout1r <- glm(I(VP1A_REP_16_10 %in% c("Donald Trump")) ~  IDEO6,data=t2.reg,
              family=binomial(link=logit))
lout2r <- glm(I(VP1A_REP_16_10 %in% c("Donald Trump")) ~  IDEO6+as.factor(VP1A_REP_1),data=t2.reg,
              family=binomial(link=logit))
lout3r <- glm(I(VP1A_REP_16_10 %in% c("Donald Trump")) ~  IDEO6+as.factor(VP1A_REP_1)+BLACK6+HISP6+EDYEARS6+PID6+AGE6,data=t2.reg,
              family=binomial(link=logit))

texreg(list(lout1r,lout2r,lout3r),
       digits=3,
       stars=0.05, 
       caption = "Logistic regression models of 2016 Trump support. 2008 support for Rudy Giuliani is the omitted baseline category.", 
       label="t:trump16", 
       custom.coef.names = c("(Intercept)", "Conservative Ideology '12", "2008: Mike Huckabee", 
                             "2008: John McCain", "2008: Mitt Romney", "2008: Fred Thompson", "2008: Sam Brownback", "2008: Duncan Hunter", "2008: Ron Paul", 
                             "2008: Ron Tancredo",  "Black '12", "Hispanic '12", "Education '12", "Party ID '12", "Age '12"),
       file="table2.tex")

table2 <- readLines("table2.tex")
table2<-table2[-c(39:42)]
table2 <- gsub("Num\\. obs\\.", "N", table2)
writeLines(table2, "table2.tex")#table 2

#w/o other indenpdent variables
mod<-predict(lout1r,data.frame(list("IDEO6"=4)),type="response")
con<-predict(lout1r,data.frame(list("IDEO6"=6)),type="response")
round(con-mod,2)#9 percentage point decrease

#conditional on primary choice 2008
mod<-predict(lout2r,data.frame(list("IDEO6"=4,"VP1A_REP_1"="Rudy Giuliani","FEMALE6"=mean(t2.reg$FEMALE6,na.rm=T),
                               "BLACK6"=mean(t2.reg$BLACK6,na.rm=T),"HISP6"=mean(t2.reg$HISP6),"EDYEARS6"=mean(t2.reg$EDYEARS6),
                               "PID6"=mean(t2.reg$PID6,na.rm=T),"AGE6"=mean(t2.reg$AGE6))),type="response")
con<-predict(lout2r,data.frame(list("IDEO6"=6,"VP1A_REP_1"="Rudy Giuliani","FEMALE6"=mean(t2.reg$FEMALE6,na.rm=T),
                               "BLACK6"=mean(t2.reg$BLACK6,na.rm=T),"HISP6"=mean(t2.reg$HISP6),"EDYEARS6"=mean(t2.reg$EDYEARS6),
                               "PID6"=mean(t2.reg$PID6,na.rm=T),"AGE6"=mean(t2.reg$AGE6))),type="response")
round(con-mod,2)#8 percentage point decrease

#comparing giuliani and romney 
giul<-predict(lout2r,data.frame(list("IDEO6"=6,"VP1A_REP_1"="Rudy Giuliani","FEMALE6"=mean(t2.reg$FEMALE6,na.rm=T),
                               "BLACK6"=mean(t2.reg$BLACK6,na.rm=T),"HISP6"=mean(t2.reg$HISP6),"EDYEARS6"=mean(t2.reg$EDYEARS6),
                               "PID6"=mean(t2.reg$PID6,na.rm=T),"AGE6"=mean(t2.reg$AGE6))),type="response")
romn<-predict(lout2r,data.frame(list("IDEO6"=6,"VP1A_REP_1"="Mitt Romney","FEMALE6"=mean(t2.reg$FEMALE6,na.rm=T),
                               "BLACK6"=mean(t2.reg$BLACK6,na.rm=T),"HISP6"=mean(t2.reg$HISP6),"EDYEARS6"=mean(t2.reg$EDYEARS6),
                               "PID6"=mean(t2.reg$PID6,na.rm=T),"AGE6"=mean(t2.reg$AGE6))),type="response")
round(giul-romn,2)#20 percentage point increase

# table 3 ----- 

#NOTE: This analysis is unweighted. 

yg23<-read.csv("yougov23.csv")
lout1r <- glm(trump_support~   ideo5,data=yg23,
              family=binomial(link=logit))
lout2r <- glm(trump_support ~  ideo5+female + black + hispanic + age + educ +
                pid7,data=yg23,
              family=binomial(link="logit"))

texreg(list(lout1r,lout2r),
       digits=3,
       stars=0.05, 
       caption = "Logistic regressions of support for Donald Trump on covariates. YouGov November 2023 survey of Americans, sample restricted to self-described Republican primary voters.", 
       label="t:trump23", 
       custom.coef.names = c("(Intercept)", "Conservative Ideology '23", "Female '23", 
                             "Black '23", "Hispanic '23", "Age '23", "Education '23", "Party ID '23"),
       file="table3.tex")

table3 <- readLines("table3.tex")
table3<-table3[-c(25:28)]
table3 <- gsub("Num\\. obs\\.", "N", table3)
writeLines(table3, "table3.tex")#table 3 


#w/o other indenpdent variables
mod<-predict(lout1r,data.frame(list("ideo5"=3)),type="response")
con<-predict(lout1r,data.frame(list("ideo5"=5)),type="response")
round(con-mod,2)#18 percentage point increase

#conditioning on partisanship and others
mod<-predict(lout2r,data.frame(list("ideo5"=3,
                               "female"=mean(yg23$female,na.rm=T),
                               "black"=mean(yg23$black,na.rm=T),"hispanic"=mean(yg23$hispanic),
                               "educ"=mean(yg23$educ),
                               "pid7"=mean(yg23$pid7,na.rm=T),"age"=mean(yg23$age))),type="response")

con<-predict(lout2r,data.frame(list("ideo5"=5,
                               "female"=mean(yg23$female,na.rm=T),
                               "black"=mean(yg23$black,na.rm=T),"hispanic"=mean(yg23$hispanic),
                               "educ"=mean(yg23$educ),
                               "pid7"=mean(yg23$pid7,na.rm=T),"age"=mean(yg23$age))),type="response")
round(con-mod,2)#5 percentage point increase

# table A2 ----- 

dta22$REPUBLICAN6<-1*(dta22$PID6%in%c(5,6,7))
dta22$DEMOCRAT6<-1*(dta22$PID6%in%c(1,2,3))

vars <- c("EDYEARS6","BLACK6","HISP6","FEMALE6","INCOME6","AGE6","REPUBLICAN6","DEMOCRAT6")
rmat <- matrix(NA,length(vars)+1,3)
for(i in 1:length(vars)){
  txt <- paste("hold <- dta22$",vars[i],sep="")
  eval(parse(text=txt))
  rmat[i,1] <- mean(hold,na.rm=T)
  rmat[i,2] <- mean(hold[! dta22$w10 %in% c(0)],na.rm=T)
  rmat[i,3] <- mean(hold[! dta22$w14 %in% c(0)],na.rm=T)
  
}

rmat[i+1,1] <- length(hold)
rmat[i+1,2] <- length(hold[! dta22$w10 %in% c(0)])
rmat[i+1,3] <- length(hold[! dta22$w14 %in% c(0)])

rmat<-rmat%>%
  as.data.frame(.)%>%
  setNames(c("Wave 7 (2012-13)", "Wave 10 (2016)", "Wave 14 (2020)"))

rownames(rmat)<-c("Education (years)", "Black", "Hispanic", "Female", "Income", "Age", "Republican", "Democrat", "n")
rmat$`Wave 7 (2012-13)`<-round(rmat$`Wave 7 (2012-13)`,3)
rmat$`Wave 10 (2016)`<-round(rmat$`Wave 10 (2016)`,3)
rmat$`Wave 14 (2020)`<-round(rmat$`Wave 14 (2020)`,3)

rmat[nrow(rmat),]<-as.character(rmat[nrow(rmat),])
print(xtable(rmat,
       digits=c(0,3,3,3),
       label="t:attrition", 
       caption=c("This table allows for an analysis of attrition by reporting the wave 7 (2012-13) demographics for respondents who remained for waves 10 (2016) and 14 (2020).")),
      file="tableA2.tex")#table A2

# table A3 ----- 
key <- read.csv("iscap-summary-table.csv")
key$Ideology_Lib <- NA
key$Age <- NA
key$Female <- NA
key$Ed.Years <- NA
key$Income <- NA
key$PartyID <- NA
key$Black <- NA
key$Hispanic <- NA

#democratic primary sample (all three elections)
sub.ta3.dem <- dta22 %>%
  dplyr::select(votechoice.dem.20, votechoice.dem.16, votechoice.dem.08, ideology_4, AGE6,FEMALE6,EDYEARS6,INCOME6,PID6,BLACK6,HISP6)%>% #subseting for colMeans
  filter(!is.na(votechoice.dem.20)) %>%
  filter(!is.na(votechoice.dem.16)) %>%
  filter(!is.na(votechoice.dem.08)) %>%
  dplyr::select(-c(votechoice.dem.08, votechoice.dem.16, votechoice.dem.20))
sub.ta3.dem$ideology_4 <- as.numeric(sub.ta3.dem$ideology_4)
mean.v1 <- colMeans(sub.ta3.dem, na.rm = TRUE)
key$Obs[1]<-nrow(sub.ta3.dem)
key$Ideology_Lib[1] <- mean.v1[1]
key$Age[1] <- mean.v1[2]
key$Female[1] <- mean.v1[3]
key$Ed.Years[1] <- mean.v1[4]
key$Income[1] <- mean.v1[5]
key$PartyID[1] <- mean.v1[6]
key$Black[1] <- mean.v1[7]
key$Hispanic[1] <- mean.v1[8]

#republican primary sample (both elections)
sub.ta3.gop <- dta22 %>%
  dplyr::select(votechoice.rep.08, votechoice.rep.16, ideology_4, AGE6,FEMALE6,EDYEARS6,INCOME6,PID6,BLACK6,HISP6)%>% #subseting for colMeans
  filter(!is.na(votechoice.rep.08)) %>%
  filter(!is.na(votechoice.rep.16)) %>%
  dplyr::select(-c(votechoice.rep.16, votechoice.rep.08))

sub.ta3.gop$ideology_4 <- as.numeric(sub.ta3.gop$ideology_4)
mean.v1 <- colMeans(sub.ta3.gop, na.rm = TRUE)
key$Obs[2]<-nrow(sub.ta3.gop)
key$Ideology_Lib[2] <- mean.v1[1]
key$Age[2] <- mean.v1[2]
key$Female[2] <- mean.v1[3]
key$Ed.Years[2] <- mean.v1[4]
key$Income[2] <- mean.v1[5]
key$PartyID[2] <- mean.v1[6]
key$Black[2] <- mean.v1[7]
key$Hispanic[2] <- mean.v1[8]

#general election sample (all three elections)
sub.ta3.general <- dta22 %>%
  dplyr::select(votechoice.08, votechoice.16, votechoice.20, ideology_4, AGE6,FEMALE6,EDYEARS6,INCOME6,PID6,BLACK6,HISP6)%>% #subseting for colMeans
  filter(!is.na(votechoice.20)) %>%
  filter(!is.na(votechoice.16)) %>%
  filter(!is.na(votechoice.08)) %>%
  dplyr::select(-c(votechoice.08, votechoice.16, votechoice.20)) 
sub.ta3.general$ideology_4 <- as.numeric(sub.ta3.general$ideology_4)
mean.v1 <- colMeans(sub.ta3.general, na.rm = TRUE)
key$Obs[3]<-nrow(sub.ta3.general)
key$Ideology_Lib[3] <- mean.v1[1]
key$Age[3] <- mean.v1[2]
key$Female[3] <- mean.v1[3]
key$Ed.Years[3] <- mean.v1[4]
key$Income[3] <- mean.v1[5]
key$PartyID[3] <- mean.v1[6]
key$Black[3] <- mean.v1[7]
key$Hispanic[3] <- mean.v1[8]

#democratic primary sample (only 2016 and 2020)
sub.ta3.dem2 <- dta22 %>%
  dplyr::select(votechoice.dem.16, votechoice.dem.20, ideology_4, AGE6,FEMALE6,EDYEARS6,INCOME6,PID6,BLACK6,HISP6)%>% #subseting for colMeans
  filter(!is.na(votechoice.dem.20)) %>%
  filter(!is.na(votechoice.dem.16)) %>%
  dplyr::select(-c(votechoice.dem.16, votechoice.dem.20))
sub.ta3.dem2$ideology_4 <- as.numeric(sub.ta3.dem2$ideology_4)
mean.v1 <- colMeans(sub.ta3.dem2, na.rm = TRUE)
key$Obs[5]<-nrow(sub.ta3.dem2)
key$Ideology_Lib[5] <- mean.v1[1]
key$Age[5] <- mean.v1[2]
key$Female[5] <- mean.v1[3]
key$Ed.Years[5] <- mean.v1[4]
key$Income[5] <- mean.v1[5]
key$PartyID[5] <- mean.v1[6]
key$Black[5] <- mean.v1[7]
key$Hispanic[5] <- mean.v1[8]

#democratic primary sample (only 2008 and 2016)
sub.ta3.dem2 <- dta22 %>%
  dplyr::select(votechoice.dem.08, votechoice.dem.16, ideology_4, AGE6,FEMALE6,EDYEARS6,INCOME6,PID6,BLACK6,HISP6)%>% #subseting for colMeans
  filter(!is.na(votechoice.dem.08)) %>%
  filter(!is.na(votechoice.dem.16)) %>%
  dplyr::select(-c(votechoice.dem.16, votechoice.dem.08))
sub.ta3.dem2$ideology_4 <- as.numeric(sub.ta3.dem2$ideology_4)
mean.v1 <- colMeans(sub.ta3.dem2, na.rm = TRUE)
key$Obs[4]<-nrow(sub.ta3.dem2)
key$Ideology_Lib[4] <- mean.v1[1]
key$Age[4] <- mean.v1[2]
key$Female[4] <- mean.v1[3]
key$Ed.Years[4] <- mean.v1[4]
key$Income[4] <- mean.v1[5]
key$PartyID[4] <- mean.v1[6]
key$Black[4] <- mean.v1[7]
key$Hispanic[4] <- mean.v1[8]

key<-key%>%
  dplyr::rename(`Ideology (Lib.)`=Ideology_Lib)%>%
  dplyr::rename(`Tables`=Figures.Tables)%>%
  dplyr::select(-Source)%>%
  dplyr::select(Sample,Obs,everything())

print(xtable(key,
             digits=c(0,rep(2,11)),
             label="t:panel:summary", 
             caption=c("Summary of ISCAP panel, and its use in the analysis.")),
      scalebox=0.8,
      include.rownames=F,
      file="tableA3.tex")#table A3

# table A4 ---- 
subh.dem <- sub.dem.non.na %>% 
  dplyr::select(votechoice.dem.08, votechoice.dem.16, votechoice.dem.20, count) %>%
  group_by(votechoice.dem.08, votechoice.dem.16, votechoice.dem.20) %>%
  dplyr::summarize(count = sum(count, na.rm = TRUE)) 

subh.dem.1 <- subh.dem %>% 
  ungroup() %>% 
  dplyr::mutate(n.frac = count / sum(count)) %>%
  arrange(desc(n.frac)) 

length(subh.dem.1$n.frac)#53 combinations
sum(subh.dem.1$n.frac[1:7])#53% of respondents in top 7

subh.dem.1.hhi<-subh.dem.1%>%
  mutate(n.frac=n.frac*100)#for hhi

subh.dem.1%>%
  filter((votechoice.dem.16=="Bernie Sanders" & 
            !votechoice.dem.20%in%c("Bernie Sanders","Elizabeth Warren"))|
           !votechoice.dem.16=="Bernie Sanders" & 
           votechoice.dem.20%in%c("Bernie Sanders","Elizabeth Warren"))%>%
  ungroup()%>%
  dplyr::select(n.frac)%>%
  sum(.)#37% switch 

subh.dem.1%>%
  filter(votechoice.dem.20!="Someone else")%>%#excluding to avoid assuming factional inconsistency
  filter((votechoice.dem.16=="Bernie Sanders" & 
            !votechoice.dem.20%in%c("Bernie Sanders","Elizabeth Warren"))|
           !votechoice.dem.16=="Bernie Sanders" & 
           votechoice.dem.20%in%c("Bernie Sanders","Elizabeth Warren"))%>%
  ungroup()%>%
  dplyr::select(n.frac)%>%
  sum(.)#36% switch 

subh.dem.1<-subh.dem.1%>%
  filter(n.frac > 0.01) %>% 
  mutate(n.frac = paste0(round(n.frac * 100,2),"%")) %>%
  setNames(c("2008", "2016", "2020", "# Respondents", "% of Respondents"))

print(xtable(subh.dem.1,
             label="pathways-support-dem", 
             caption=c("Percentage of vote pathways in Democratic primaries among all individuals who reported a preference in each of the Democratic primaries in 2008, 2016, and 2020.")),
      include.rownames=F, 
      file="tableA4.tex")#table A4

# table A5 ---- 
subh.rep <- sub.rep.non.na %>% 
  dplyr::select(votechoice.rep.08, votechoice.rep.16, count) %>%
  group_by(votechoice.rep.08, votechoice.rep.16) %>% 
  dplyr::summarize(count = sum(count, na.rm = TRUE))

subh.rep.1 <- subh.rep %>% 
  ungroup() %>% 
  dplyr::mutate(n.frac = count / sum(count)) %>%
  arrange(desc(n.frac))

subh.rep.1.hhi<-subh.rep.1%>%
  mutate(n.frac=n.frac*100)#for hhi

subh.rep.1<-subh.rep.1%>%
  filter(n.frac > 0.01) %>% 
  mutate(n.frac = paste0(round(n.frac * 100,2),"%")) %>%
  setNames(c("2008", "2016", "# Respondents", "% of Respondents"))

print(xtable(subh.rep.1,
             label="pathways-support-rep", 
             caption=c("Percentage of vote pathways in Republican primaries among all individuals who reported a preference in each of the Republican primaries in 2008 and 2016.")),
      include.rownames=F, 
      file="tableA5.tex")#table A5

# table A6 ---- 
subh.gen <- dta22 %>% 
  dplyr::select(votechoice.08, votechoice.16, votechoice.20) %>%
  na.omit(.)

subh.gen.1 <- subh.gen %>%
  group_by(votechoice.08, votechoice.16, votechoice.20) %>%
  dplyr::summarize(n=n()) %>%
  ungroup() %>% 
  dplyr::mutate(n.frac = n / sum(n)) %>%
  arrange(desc(n.frac)) 

subh.gen.1.hhi<-subh.gen.1%>%
  mutate(n.frac=n.frac*100)#for hhi
  
DescTools::Herfindahl(subh.dem.1.hhi$n.frac)#0.06
DescTools::Herfindahl(subh.rep.1.hhi$n.frac)#0.05 
DescTools::Herfindahl(subh.gen.1.hhi$n.frac)#0.17

subh.gen.1<-subh.gen.1%>% 
  filter(n.frac > 0.01) %>% 
  mutate(n.frac = paste0(round(n.frac * 100,2),"%")) %>%
  setNames(c("2008", "2016", "2020", "# Respondents", "% of Respondents"))

print(xtable(subh.gen.1,
             label="pathways-support-general", 
             caption=c("Percentage of vote pathways in general elections among all individuals who reported a preference in each of the general elections in 2008, 2016, and 2020.")),
      include.rownames=F, 
      file="tableA6.tex")#table A6

# table A7 ---- 
dta22$IDEO4 <- NA
dta22$IDEO4[dta22$ideology_4=="extreme con"] <- 7
dta22$IDEO4[dta22$ideology_4=="con"] <- 6
dta22$IDEO4[dta22$ideology_4=="slight con"] <- 5
dta22$IDEO4[dta22$ideology_4=="moderate"] <- 4
dta22$IDEO4[dta22$ideology_4=="slight lib"] <- 3
dta22$IDEO4[dta22$ideology_4=="lib"] <- 2
dta22$IDEO4[dta22$ideology_4=="extreme lib"] <- 1

print(xtable(dta22 %>%
               filter(!is.na(votechoice.dem.08))%>%
               group_by(votechoice.dem.08)%>%
               dplyr::summarise(mean_ideo = mean(IDEO4,na.rm=T),
                                n=n()) %>%
               arrange(mean_ideo)%>%
               setNames(c("Candidate","Mean Ideo.", "n")),
             label="t:iscap:2008:dems", 
             caption=c("For 2008, this table presents the average ideology on a 1-7 scale for Democratic respondents to the 2008 January-March ISCAP wave.")),
      include.rownames=F,
      file="tableA7.tex")#table A7

# table A8 ---- 
print(xtable(dta22 %>%
               filter(!is.na(votechoice.dem.16))%>%
               group_by(votechoice.dem.16)%>%
               dplyr::summarise(mean_ideo = mean(IDEO4,na.rm=T),
                                n=n()) %>%
               arrange(mean_ideo)%>% 
               setNames(c("Candidate","Mean Ideo.", "n")),
             label="t:iscap:2016:dems", 
             caption=c("For 2016, this table presents the average ideology on a 1-7 scale for Democratic respondents to the January-February 2016 ISCAP wave.")),
      include.rownames=F,
      file="tableA8.tex")#table A8

# table A9 ---- 
print(xtable(dta22 %>%
               filter(!is.na(votechoice.dem.20))%>%
               group_by(votechoice.dem.20)%>%
               dplyr::summarise(mean_ideo = mean(IDEO4,na.rm=T),
                                n=n()) %>%
               arrange(mean_ideo)%>% 
               setNames(c("Candidate","Mean Ideo.", "n")),
             label="t:iscap:2020:dems", 
             caption=c("For 2020, this table presents the average ideology on a 1-7 scale for Democratic respondents to the January 2020 ISCAP wave.")),
      include.rownames=F,
      file="tableA9.tex")#table A9

# table A10 ---- 

print(xtable(dta22 %>%
               filter(!is.na(votechoice.rep.08))%>%
               group_by(votechoice.rep.08)%>%
               dplyr::summarise(mean_ideo = mean(IDEO4,na.rm=T),
                                n=n()) %>%
               arrange(mean_ideo)%>% 
               setNames(c("Candidate","Mean Ideo.", "n")),
             label="t:iscap:2008:gop", 
             caption=c("For 2008, this table presents the average ideology on a 1-7 scale for Republican respondents to the 2007 October-December ISCAP wave.")),
      include.rownames=F,
      file="tableA10.tex")#table A10

# table A11 ---- 
print(xtable(dta22 %>%
               filter(!is.na(votechoice.rep.16))%>%
               group_by(votechoice.rep.16)%>%
               dplyr::summarise(mean_ideo = mean(IDEO4,na.rm=T),
                                n=n()) %>%
               arrange(mean_ideo)%>% 
               setNames(c("Candidate","Mean Ideo.", "n")),
             label="t:iscap:2016:gop", 
             caption=c("For 2016, this table presents the average ideology on a 1-7 scale for Republican respondents to the January-February 2016 ISCAP wave.")),
      include.rownames=F,
      file="tableA11.tex")#table A11
